Denver is an emerging city with a combination of rapid population growth, diverse economic base, infrastructure investments and available lands. It has experienced significant population growth in recent years and has been one of the fastest-growing metropolitan areas in the United States. However, Denver is located near the Rocky Mountains and Great Plains and has a limited amount of land available for development, which increases the pressure for urban expansion and sprawl into surrounding areas. But the Denver metropolitan area still has a significant amount of undeveloped land, particularly to the east and northeast of the city. Also, Denver has been making substantial investments in transportation infrastructure, like highway systems and light rail expansion. As such, Denver is in the face of potential, continuous land use change and urban sprawl.
library(tidyverse)
library(sf)
library(raster)
library(knitr)
library(kableExtra)
library(tidycensus)
library(tigris)
library(FNN)
#library(QuantPsyc) # JE Note: in R 4.1, QuantPsyc package not available.
library(caret)
library(yardstick)
library(pscl)
library(plotROC)
library(ggrepel)
library(pROC)
library(grid)
library(gridExtra)
library(viridis)
library(igraph)
library(ggplot2)
library(dplyr)
plotTheme <- theme(
plot.title = element_text(size=16, face="bold"),
plot.subtitle = element_text(size=12, margin=margin(b=20)),
plot.caption = element_text(size=8, hjust = 1, vjust = 0),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.text.x=element_text(size=8),
axis.text.y=element_text(size=8),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major = element_line(color="gray", linetype="dotted"),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10, face = "bold"),
legend.title.align = 0.5,
legend.background=element_rect(fill="white"),
legend.box.background = element_rect(color = NA, fill = "white"),
)
mapTheme <- theme(
plot.title = element_text(size=16, face="bold"),
plot.subtitle = element_text(size=12, margin=margin(b=20)),
plot.caption = element_text(size=8, hjust = 1, vjust = 0),
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_line(colour = 'transparent'),
panel.grid.minor=element_blank(),
legend.direction = "vertical",
legend.position = "right",
plot.margin = margin(1, 1, 1, 1, 'cm'),
legend.key.height = unit(1, "cm"),
legend.key.width = unit(0.2, "cm"),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10, face = "bold"),
legend.title.align = 0.5,
legend.background=element_rect(fill="white"),
legend.box.background = element_rect(color = NA, fill = "white"),
legend.key = element_rect(color = NA, fill = "white"),
legend.key.size=unit(1.2, "lines"),
legend.spacing=unit(0.4, "cm"),
panel.spacing=unit(1, "lines")
)
# 2 colors
palette2 <- c( "#fcc5c0", "#f768a1")
# 4 colors
palette4<- c("#fee5d9", "#FCC5C0", "#fcae91", "#fa9fb5", "#f768a1")
# 5 colors
palette5 <- c("#FAA1B6", "#FCC5C0", "#FEE5D9", "#9E9AC8", "#54278F")
# 10 colors
palette10 <- c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1",
"#dd3497", "#ae017e", "#7a0177", "#49006a", "#f7f7f7")
#this function converts a column in to quintiles. It is used for mapping.
quintileBreaks <- function(df,variable) {
as.character(quantile(df[[variable]],
c(.01,.2,.4,.6,.8),na.rm=T))
}
#This function can be used to convert a polygon sf to centroids xy coords.
xyC <- function(aPolygonSF) {
as.data.frame(
cbind(x=st_coordinates(st_centroid(aPolygonSF))[,1],
y=st_coordinates(st_centroid(aPolygonSF))[,2]))
}
#this function convert a raster to a data frame so it can be plotted in ggplot
rast <- function(inRaster) {
data.frame(
xyFromCell(inRaster, 1:ncell(inRaster)),
value = getValues(inRaster)) }
Land cover in Denver between 2011 and 2019 highlights substantial changes in land use, particularly in north-east corner areas such as Central Park, Green Valley Ranch, and the vicinity of Denver International Airport. In contrast, other parts of the city have experienced relatively minor changes in land use during the same period.
denver_boundary <-
st_read("/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/county_boundary/county_boundary.shp") # JE Note: added 'ESRI:' in front of projection
lc_2011 <- raster("/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/nlcd_2011_land_cover/nlcd_2011_land_cover.tif")
lc_2019 <- raster("/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/nlcd_2019_land_cover/nlcd_2019_land_cover.tif")
reclassMatrix <-
matrix(c(
0,12,0,
12,24,1,
24,Inf,0),
ncol=3, byrow=T)
developed_2011 <-
reclassify(lc_2011,reclassMatrix)
developed_2019 <-
reclassify(lc_2019,reclassMatrix)
development_change <- developed_2011 + developed_2019
hist(development_change,
main = "Distribution of Development Change",
xlab = "Change in Development",
ylab = "Frequency",
col = "#f768a1",
border = "white",
breaks = 20)
development_change
## class : RasterLayer
## dimensions : 1359, 2019, 2743821 (nrow, ncol, ncell)
## resolution : 0.0003349821, 0.000335019 (x, y)
## extent : -105.1943, -104.518, 39.53652, 39.99181 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : memory
## names : layer
## values : 0, 2 (min, max)
aggregate(development_change, fact = 2)
## class : RasterLayer
## dimensions : 680, 1010, 686800 (nrow, ncol, ncell)
## resolution : 0.0006699642, 0.000670038 (x, y)
## extent : -105.1943, -104.5177, 39.53619, 39.99181 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : memory
## names : layer
## values : 0, 2 (min, max)
development_change[development_change != 1] <- NA
ggplot() +
geom_sf(data = denver_boundary, color = "#f768a1", fill = NA) +
geom_raster(data = rast(development_change) %>% na.omit(),
aes(x, y, fill = as.factor(value)), interpolate = TRUE) +
scale_fill_manual(values = colors, name = "Land Cover Change",
labels = c("No change", "Change")) +
scale_fill_viridis(discrete=TRUE, name ="Land Cover hange") +
labs(title="Development Land Use Change") +
theme() +
mapTheme
lc_change <- development_change
denver_fishnet <-
st_make_grid(denver_boundary, 0.0082) %>%
st_sf()
denver_fishnet <-
denver_fishnet[denver_boundary,]
ggplot() +
geom_sf(data=denver_fishnet, fill="white", color="gray") +
geom_sf(data = denver_boundary, fill = "transparent", color = "#f768a1", size = 1) +
labs(title="Fishnet, 3000 Ft Resolution",
subtitle="Denver, Colorado",
caption="Data Source: Open Data Denver") +
theme_void() +
theme(plot.title = element_text(size=16, face="bold"),
plot.subtitle = element_text(size=12, margin=margin(b=20)),
plot.caption = element_text(size=8),
panel.grid.major = element_line(color="gray", linetype="dotted"))
changePoints <-
rasterToPoints(lc_change) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(denver_fishnet))
fishnet <-
aggregate(changePoints, denver_fishnet, sum) %>%
mutate(layer = ifelse(is.na(layer), 0, 1),
layer = as.factor(layer)) %>%
rename("lc_change" = "layer")
ggplot() +
geom_sf(data=denver_boundary,
color = '#7a0177',
size = 2,
fill = 'NA') +
geom_sf(data=denver_fishnet,
color="white",
size=0.1,
fill="transparent") +
geom_point(data=fishnet,
size=0.5,
aes(x=xyC(fishnet)$x, y=xyC(fishnet)$y, colour=lc_change)) +
scale_colour_manual(values = c("#fcc5c0","#f768a1"),
labels=c("No Change","New Development"),
name = "Land Cover Change") +
labs(title = "Land Cover Development Change",
subtitle = "As Fishnet Centroids") +
mapTheme +
guides(colour = guide_legend(override.aes = list(size=5)))
my_palette <- c("#FEE5D9", "#FCC5C0", "#FAA1B6", "#F768A1", "#DD3497", "#AE017E", "#7A0177", "#49006A", "#9E9AC8", "#807DBA", "#6A51A3", "#54278F", "#3F007D", "#2D004B")
ggplot() +
geom_sf(data=denver_boundary) +
geom_raster(data=rast(lc_2011) %>% na.omit %>% filter(value > 0),
aes(x,y,fill=as.factor(value))) +
scale_fill_manual(values = my_palette, name = "Land Cover") +
labs(title = "Land Cover, 2011",
subtitle="Denver, Colorado",
caption="Data Source: Multi-Resolution Land Characteristics (MRLC) ") +
mapTheme +
theme(legend.direction="horizontal",
legend.key.width = unit(0.2, "cm"),
legend.margin=margin(t=10, b=10),
legend.title.align = 0.5)
ggplot() +
geom_sf(data=denver_boundary) +
geom_raster(data=rast(lc_2019) %>% na.omit %>% filter(value > 0),
aes(x,y,fill=as.factor(value))) +
scale_fill_manual(values = my_palette, name = "Land Cover") +
labs(title = "Land Cover, 2019",
subtitle="Denver, Colorado",
caption="Data Source: Multi-Resolution Land Characteristics (MRLC) ") +
mapTheme +
theme(legend.direction="horizontal",
legend.key.width = unit(0.2, "cm"),
legend.margin=margin(t=10, b=10),
legend.title.align = 0.5)
The table below shows the approach taken to recoded existing land
cover codes into the categories used in our analysis. In the code block
below new rasters are generated and names are applied.
Naming ensures that when the raster is integrated with the fishnet, the
column reflects the appropriate raster.
| Old_Classification | New_Classification |
|---|---|
| Open Space as well as Low, Medium and High Intensity Development | Developed |
| Deciduous, Evergreen, and Mixed Forest | Forest |
| Pasture/Hay and Cultivated Crops | Farm |
| Woody and Emergent Herbaceous Wetlands | Woodlands |
| Barren Land, Dwarf Scrub, and Grassland/Herbaceous | Other Undeveloped |
| Water | Water |
developed <- lc_2011 == 21 | lc_2011 == 22 | lc_2011 == 23 | lc_2011 == 24
forest <- lc_2011 == 41 | lc_2011 == 42 | lc_2011 == 43
farm <- lc_2011 == 81 | lc_2011 == 82
wetlands <- lc_2011 == 90 |lc_2011 == 95
otherUndeveloped <- lc_2011== 52 | lc_2011 == 71 | lc_2011 == 31
water <- lc_2011 == 11
names(developed) <- "developed"
names(forest) <- "forest"
names(farm) <- "farm"
names(wetlands) <- "wetlands"
names(otherUndeveloped) <- "otherUndeveloped"
names(water) <- "water"
aggregateRaster <- function(inputRasterList, theFishnet) {
#create an empty fishnet with the same dimensions as the input fishnet
theseFishnets <- theFishnet %>% dplyr::select()
#for each raster in the raster list
for (i in inputRasterList) {
#create a variable name corresponding to the ith raster
varName <- names(i)
#convert raster to points as an sf
thesePoints <-
rasterToPoints(i) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(theFishnet)) %>%
filter(.[[1]] == 1)
#aggregate to the fishnet
thisFishnet <-
aggregate(thesePoints, theFishnet, length) %>%
mutate(!!varName := ifelse(is.na(.[[1]]),0,1))
#add to the larger fishnet
theseFishnets <- cbind(theseFishnets,thisFishnet)
}
#output all aggregates as one large fishnet
return(theseFishnets)
}
theRasterList <- c(developed,forest,farm,wetlands,otherUndeveloped,water)
aggregatedRasters <-
aggregateRaster(theRasterList, denver_fishnet) %>%
dplyr::select(developed,forest,farm,wetlands,otherUndeveloped,water) %>%
mutate_if(is.numeric,as.factor)
titles <- c("developed" = "Developed",
"forest" = "Forest",
"farm" = "Farm",
"wetlands" = "Wetlands",
"otherUndeveloped" = "Other Undeveloped",
"water" = "Water")
aggregatedRasters %>%
gather(var,value,developed:water) %>%
st_cast("POLYGON") %>% #just to make sure no weird geometries slipped in
mutate(X = xyC(.)$x,
Y = xyC(.)$y) %>%
ggplot() +
geom_sf(data=denver_boundary) +
geom_point(aes(X,Y, colour=as.factor(value)),
size=0.1) +
facet_wrap(~var, labeller = labeller(var = titles)) +
scale_colour_manual(values = c("#fcc5c0","#f768a1"),
labels=c("Other","Land Cover"),
name = "") +
labs(title = "Land Cover Types, 2011",
subtitle = "As fishnet centroids") +
mapTheme +
theme(strip.text = element_text(color = "black",
size = 10,
face = "bold"),
strip.background = element_rect(fill = "#FEE5D9", color = "NA"))
developed_19 <- lc_2019 == 21 | lc_2019 == 22 | lc_2019 == 23 | lc_2019 == 24
forest_19 <- lc_2019 == 41 | lc_2019 == 42 | lc_2019 == 43
wetlands_19 <- lc_2019 == 90 | lc_2019 == 95
otherUndeveloped_19 <- lc_2019 == 52 | lc_2019 == 71 | lc_2019 == 31
water_19 <- lc_2019 == 11
farm_19 <- lc_2019 == 82 | lc_2019 == 81
names(developed_19) <- "developed_19"
names(forest_19) <- "forest_19"
names(farm_19) <- "farm_19"
names(wetlands_19) <- "wetlands_19"
names(otherUndeveloped_19) <- "otherUndeveloped_19"
names(water_19) <- "water_19"
theRasterList_19 <- c(developed_19,forest_19,farm_19,wetlands_19,otherUndeveloped_19,water_19)
aggregatedRasters_19 <-
aggregateRaster(theRasterList_19, denver_fishnet) %>%
dplyr::select(developed_19,forest_19,farm_19,wetlands_19,otherUndeveloped_19,water_19) %>%
mutate_if(is.numeric,as.factor)
new_titles <- c("developed_19" = "Developed",
"forest_19" = "Forest",
"farm_19" = "Farm",
"wetlands_19" = "Wetlands",
"otherUndeveloped_19" = "Other Undeveloped",
"water_19" = "Water")
aggregatedRasters_19 %>%
gather(var,value,developed_19:water_19) %>%
st_cast("POLYGON") %>% #just to make sure no weird geometries slipped in
mutate(X = xyC(.)$x,
Y = xyC(.)$y) %>%
ggplot() +
geom_sf(data=denver_boundary) +
geom_point(aes(X,Y, colour=as.factor(value)),
size=0.2) +
facet_wrap(~var, labeller = labeller(var = new_titles)) +
scale_colour_manual(values = c("#fcc5c0","#f768a1"),
labels=c("Other","Land Cover"),
name = "") +
labs(title = "Land Cover Types, 2019",
subtitle = "As fishnet centroids") +
mapTheme +
theme(strip.text = element_text(color = "black",
size = 10,
face = "bold"),
strip.background = element_rect(fill = "#FEE5D9", color = "NA"))
census_api_key("a9e713a06a0a0f8ec8531e047c9d01e7d9f507d9", overwrite = TRUE, install = TRUE)
denverPop10 <-
get_acs(geography = "tract", variables = "B01003_001", year = 2010,
state = 8, county = "031", geometry = TRUE) %>%
rename(pop_2010 = estimate) %>%
st_transform(st_crs(denver_fishnet))
denverPop18 <-
get_acs(geography = "tract", variables = "B01003_001", year = 2018,
state = 8, county = "031", geometry = TRUE) %>%
rename(pop_2018 = estimate) %>%
st_transform(st_crs(denver_fishnet))
ggplot() +
geom_sf(data = denverPop10, aes(fill=factor(ntile(pop_2010,5))), colour=NA) +
scale_fill_manual(values = palette5,
labels=quintileBreaks(denverPop10,"pop_2010"),
name="Quintile\nBreaks") +
labs(title="Population, Denver: 2011") +
mapTheme +
theme(legend.key.width = unit(0.5, "cm"),
legend.text = element_text(size = 12),
legend.title = element_text(size=14))
ggplot() +
geom_sf(data = denverPop18, aes(fill=factor(ntile(pop_2018,5))), colour=NA) +
scale_fill_manual(values = palette5,
labels=quintileBreaks(denverPop18,"pop_2018"),
name="Quintile\nBreaks") +
labs(title="Population, Denver: 2018") +
mapTheme +
theme(legend.key.width = unit(0.5, "cm"),
legend.text = element_text(size = 12),
legend.title = element_text(size=14))
denver_fishnet <-
denver_fishnet %>%
rownames_to_column("fishnetID") %>%
mutate(fishnetID = as.numeric(fishnetID)) %>%
dplyr::select(fishnetID)
fishnetPopulation10 <-
st_interpolate_aw(denverPop10["pop_2010"], denver_fishnet, extensive=TRUE) %>%
as.data.frame(.) %>%
rownames_to_column(var = "fishnetID") %>%
left_join(denver_fishnet %>%
mutate(fishnetID = as.character(fishnetID)),
., by=c("fishnetID"='fishnetID')) %>%
mutate(pop_2010 = replace_na(pop_2010,0)) %>%
dplyr::select(pop_2010)
fishnetPopulation18 <-
st_interpolate_aw(denverPop18["pop_2018"], denver_fishnet, extensive=TRUE) %>%
as.data.frame(.) %>%
rownames_to_column(var = "fishnetID") %>%
left_join(denver_fishnet %>%
mutate(fishnetID = as.character(fishnetID)),
., by=c("fishnetID"='fishnetID')) %>%
mutate(pop_2018 = replace_na(pop_2018,0)) %>%
dplyr::select(pop_2018)
fishnetPopulation <-
cbind(fishnetPopulation10,fishnetPopulation18) %>%
dplyr::select(pop_2010,pop_2018) %>%
mutate(pop_Change = pop_2018 - pop_2010)
Between 2010 to 2018, the population of Denver has experienced notable spatial changes, particularly in the eastern part of the metropolitan area. The eastward shift in population growth can be attributed to factors such as new residential developments, the availability of more affordable housing options, and the ongoing expansion of transportation infrastructure in the region. In contrast, the western part has seen comparatively stable population figures due to geographic constraints, limited availability of developable land, and a lower rate of new construction projects.
grid.arrange(ggplot() +
geom_sf(data=denverPop10, aes(fill=factor(ntile(pop_2010,5))),colour=NA) +
scale_fill_manual(values = palette5,
labels=substr(quintileBreaks(denverPop10,"pop_2010"),1,4),
name="Quintile\nBreaks") +
labs(title="Population, Denver: 2010",
subtitle="Represented as tracts; Boundaries omitted") +
mapTheme +
theme(legend.key.width = unit(0.5, "cm"),
legend.text = element_text(size = 10),
legend.title = element_text(size=12)),
ggplot() +
geom_sf(data=fishnetPopulation, aes(fill=factor(ntile(pop_2010,5))),colour=NA) +
scale_fill_manual(values = palette5,
labels=substr(quintileBreaks(fishnetPopulation,"pop_2010"),1,4),
name="Quintile\nBreaks") +
labs(title="Population, Denver: 2010",
subtitle="Represented as fishnet gridcells; Boundaries omitted") +
mapTheme +
theme(legend.key.width = unit(0.5, "cm"),
legend.text = element_text(size = 10),
legend.title = element_text(size=12)), ncol=2)
grid.arrange(ggplot() +
geom_sf(data=denverPop18, aes(fill=factor(ntile(pop_2018,5))),colour=NA) +
scale_fill_manual(values = palette5,
labels=substr(quintileBreaks(denverPop18,"pop_2018"),1,4),
name="Quintile\nBreaks") +
labs(title="Population, Denver: 2018",
subtitle="Represented as tracts; Boundaries omitted") +
mapTheme +
theme(legend.key.width = unit(0.5, "cm"),
legend.text = element_text(size = 10),
legend.title = element_text(size=12)),
ggplot() +
geom_sf(data=fishnetPopulation, aes(fill=factor(ntile(pop_2018,5))),colour=NA) +
scale_fill_manual(values = palette5,
labels=substr(quintileBreaks(fishnetPopulation,"pop_2018"),1,4),
name="Quintile\nBreaks") +
labs(title="Population, Denver: 2018",
subtitle="Represented as fishnet gridcells; Boundaries omitted") +
mapTheme +
theme(legend.key.width = unit(0.5, "cm"),
legend.text = element_text(size = 10),
legend.title = element_text(size=12)), ncol=2)
In terms of transportation infrastructure, except for the airport in the northeast corner area, the rest of the region is basically covered by the transportation network. The eastern part of the metropolitan area, characterized by its flat terrain and growing population, enjoys greater accessibility to transportation infrastructure, whereas the western part, constrained by the Rocky Mountains, has fewer options and longer distances to major transportation corridors.
highway <-
st_read("/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/highways.geojson") %>%
st_transform(st_crs(denver_boundary)) %>%
st_intersection(denver_boundary)
ggplot() +
geom_point(data=fishnet,
aes(x=xyC(fishnet)[,1],
y=xyC(fishnet)[,2],
colour=lc_change),size=0.3) +
geom_sf(data=highway,
color = "#7A0177") +
scale_colour_manual(values = palette2,
labels=c("No Change","New Development")) +
labs(title = "New Development and railroad",
subtitle = "As fishnet centroids") +
mapTheme
highwayPoints_fishnet <-
st_read("/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/highwayPoints_fishnet/highwayPoints_fishnet.shp") %>%
st_transform(st_crs(denver_fishnet))
## Reading layer `highwayPoints_fishnet' from data source
## `/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/highwayPoints_fishnet/highwayPoints_fishnet.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 766 features and 13 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -105.11 ymin: 39.61431 xmax: -104.5934 ymax: 39.91771
## Geodetic CRS: WGS 84
highwayPoints_fishnet <- subset(highwayPoints_fishnet, select = -c(OBJECTID, FID_, COUNT, AREA, MIN, MAX,RANGE, STD, SUM, MEDIAN, PCT90)) %>%
rename(distance_highways = MEAN)
highwayPoints_fishnet$distance_highways <- highwayPoints_fishnet$distance_highways * 100000
Accessibility is obtained by measuring the distance from each grid cell to the nearest highway.
ggplot() +
geom_point(data=highwayPoints_fishnet,
aes(x=xyC(highwayPoints_fishnet)[,1],
y=xyC(highwayPoints_fishnet)[,2],
colour=factor(ntile(distance_highways,5))),
size=1,
fill = "transparent") +
scale_colour_manual(values = palette5,
labels=substr(quintileBreaks(highwayPoints_fishnet,"distance_highways"),1,8),
name="Quintile\nBreaks") +
geom_sf(data=highway,
colour = "#7A0177") +
geom_sf(data=denver_boundary,
color = '#7a0177',
size = 2,
fill = 'NA') +
labs(title = "Distance to Highways",
subtitle = "As fishnet centroids; Highway visualized in violet") +
mapTheme
While downtown Denver has remained relatively stable, development along the city’s borders, particularly in the northeastern quadrant, has had a more significant impact. Areas surrounding the airport, for example, are strongly influenced by development in the surrounding area, indicating that the availability of infrastructure and land use are shaping the spatial distribution of urban development.
nn_function <- function(measureFrom,measureTo,k) {
#convert the sf layers to matrices
measureFrom_Matrix <-
as.matrix(measureFrom)
measureTo_Matrix <-
as.matrix(measureTo)
nn <-
get.knnx(measureTo, measureFrom, k)$nn.dist
output <-
as.data.frame(nn) %>%
rownames_to_column(var = "thisPoint") %>%
gather(points, point_distance, V1:ncol(.)) %>%
arrange(as.numeric(thisPoint)) %>%
group_by(thisPoint) %>%
summarize(pointDistance = mean(point_distance)) %>%
arrange(as.numeric(thisPoint)) %>%
dplyr::select(-thisPoint) %>%
pull()
return(output)
}
fishnet$lagDevelopment <-
nn_function(xyC(fishnet),
xyC(filter(aggregatedRasters,developed==1)),
3)
fishnet$lagDevelopment <- fishnet$lagDevelopment * 100000
# Custom function to create labels based on the actual range of values
create_lag_labels <- function(data, variable, n_groups) {
breaks <- cut(data[[variable]], breaks = n_groups, labels = FALSE)
min_values <- tapply(data[[variable]], breaks, min)
max_values <- tapply(data[[variable]], breaks, max)
labels <- sprintf("%.2f - %.2f", min_values, max_values)
return(labels)
}
# Use the custom function to create labels for the lagDevelopment variable
lag_labels <- create_lag_labels(fishnet, "lagDevelopment",5)
ggplot() +
geom_point(
data = fishnet,
aes(
x = xyC(fishnet)[, 1],
y = xyC(fishnet)[, 2],
colour = factor(ntile(lagDevelopment, 5))
),
size = 1
) +
geom_sf(data=denver_boundary,
color = '#7a0177',
size = 2,
fill = 'NA') +
scale_colour_manual(
values = palette5,
labels = lag_labels,
name = "Lag Development Range"
) +
labs(
title = "Spatial Lag to 2011 Development",
subtitle = "As fishnet centroids"
) +
mapTheme
denver_lc_points <-
rasterToPoints(lc_change) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(denver_fishnet))
dat <-
cbind(
fishnet, highwayPoints_fishnet, fishnetPopulation, aggregatedRasters) %>%
st_join(denver_lc_points) %>%
dplyr::select(lc_change, developed, forest, farm, wetlands, otherUndeveloped, water,
pop_2010, pop_2018, pop_Change, distance_highways, lagDevelopment) %>%
st_join(denver_boundary) %>%
mutate(developed10 = ifelse(lc_change == 1 & developed == 1, 0, developed)) %>%
filter(water == 0)
dat <- dat %>%
filter(COUNTY == "Denver")
The analysis reveals that, between 2011 and 2019, new land development in Denver has shifted away from areas in close proximity to highways and towards less accessible locations. Concurrently, development has transitioned to less densely populated regions. These trends indicate a suburban expansion pattern over the past decade.
dat %>%
dplyr::select(distance_highways, lagDevelopment, lc_change) %>%
gather(Variable, Value, -lc_change, -geometry) %>%
ggplot(., aes(factor(lc_change), Value, fill = factor(lc_change)))+
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width = 0.4) +
facet_wrap(~Variable) +
scale_fill_manual(values = alpha(palette2, 0.5),
labels = c("No Change", "New Development"),
name = "") +
labs(title = "New Development as a Function of the Continuous Variables") +
theme(
strip.text = element_text(color = "black", size = 10, face = "bold"),
strip.background = element_rect(fill = "lightgrey", color = NA),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
plotTheme
dat %>%
dplyr::select(pop_2010, pop_2018, pop_Change, lc_change) %>%
gather(Variable, Value, -lc_change, -geometry) %>%
ggplot(., aes(factor(lc_change), Value, fill = factor(lc_change))) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width = 0.5) +
facet_wrap(~Variable) +
scale_fill_manual(values = alpha(palette2, 0.5),
labels = c("No Change", "New Development"),
name = "") +
labs(title = "New Development as a Function of Factor Variables") +
theme(strip.text = element_text(color = "black",
size = 10,
face = "bold"),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
plotTheme
Developed land has seen a significant increase, accounting for 31.15% of the total land cover change. This suggests that urbanization is occurring at a rapid pace in the Denver metropolitan area.
Farm and other undeveloped lands also show notable conversion rates of 25.99% and 31.83%, respectively. These figures indicate that agricultural and other undeveloped lands are being transformed to support the growing population and urbanization in the region.
Forest and wetlands have comparatively lower conversion rates at 0.3% and 7.34%, respectively. This may suggest that these areas are somewhat protected from the pressures of urbanization or have lower suitability for development.
dat %>%
dplyr::select(lc_change:otherUndeveloped, developed) %>%
gather(Land_Cover_Type, Value, -lc_change, -geometry) %>%
st_set_geometry(NULL) %>%
group_by(lc_change, Land_Cover_Type) %>%
summarize(n = sum(as.numeric(Value))) %>%
ungroup() %>%
mutate(Conversion_Rate = paste0(round(100 * n / sum(n), 2), "%")) %>%
filter(lc_change == 1) %>%
dplyr::select(Land_Cover_Type, Conversion_Rate) %>%
kable(format = "html", col.names = c("Land Cover Type", "Conversion Rate"), align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)
| Land Cover Type | Conversion Rate |
|---|---|
| developed | 31.15% |
| farm | 25.99% |
| forest | 0.3% |
| otherUndeveloped | 31.83% |
| wetlands | 7.34% |
set.seed(3456)
trainIndex <-
createDataPartition(dat$developed, p = .70,
list = FALSE,
times = 1)
datTrain <- dat[ trainIndex,]
datTest <- dat[-trainIndex,]
nrow(dat)
## [1] 8231
The dependent variable is a binary outcome. First, the data is split into training and testing sets. Model 4 is the final model used for prediction.
Model 1 only includes land cover types in 2001.
Model2 added lag development.
Model3 uses the 2010 population.
Model4 uses 2010 and 2018 population.
Model5 uses population change
Model 6 adds highway distance.
Model1 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped,
family="binomial"(link="logit"), data = datTrain)
Model2 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment,
family="binomial"(link="logit"), data = datTrain)
Model3 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_2010,
family="binomial"(link="logit"), data = datTrain)
Model4 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_2010 +
pop_2018,
family="binomial"(link="logit"), data = datTrain)
Model5 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_Change,
family="binomial"(link="logit"), data = datTrain)
Model6 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_Change +
distance_highways,
family="binomial"(link="logit"), data = datTrain)
Model4 seems to have a better fit than Model6 as it has a lower residual deviance (966.43 vs. 1112.0) and a lower Akaike Information Criterion (AIC) value (982.43 vs. 1128). Both of these metrics are commonly used to compare the goodness of fit among models, with lower values indicating a better fit. Furthermore, it suggests that pop_2010 and pop_2018 are significant predictors, indicating that population change over time is a crucial factor influencing land cover change.
Oddly, the spatial relationship between new and old developments does not seem to have a strong influence through the model, while the influence of the highway is not so significant as well.
modelList <- paste0("Model", 1:6)
map_dfc(modelList, function(x)pR2(get(x)))[4,] %>%
setNames(paste0("Model",1:6)) %>%
gather(Model,McFadden) %>%
ggplot(aes(Model,McFadden)) +
geom_bar(stat="identity",
width = 0.4,
color = '#7a0177',
size = 0.8,
fill = '#7a0177',
alpha = 0.2) +
labs(title= "McFadden R-Squared by Model") +
plotTheme
## fitting null model for pseudo-r2
## fitting null model for pseudo-r2
## fitting null model for pseudo-r2
## fitting null model for pseudo-r2
## fitting null model for pseudo-r2
## fitting null model for pseudo-r2
summary(Model6)
##
## Call:
## glm(formula = lc_change ~ wetlands + forest + farm + otherUndeveloped +
## lagDevelopment + pop_Change + distance_highways, family = binomial(link = "logit"),
## data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.769e+00 4.315e-01 -6.418 1.38e-10 ***
## wetlands1 -2.690e+00 2.963e-01 -9.076 < 2e-16 ***
## forest1 -4.909e-01 1.036e+00 -0.474 0.63577
## farm1 3.992e+00 2.521e-01 15.839 < 2e-16 ***
## otherUndeveloped1 5.236e+00 2.260e-01 23.171 < 2e-16 ***
## lagDevelopment 4.143e-04 7.135e-04 0.581 0.56145
## pop_Change 1.506e-03 4.749e-04 3.172 0.00151 **
## distance_highways 1.417e-04 6.905e-05 2.052 0.04019 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2774.3 on 5762 degrees of freedom
## Residual deviance: 1112.0 on 5755 degrees of freedom
## AIC: 1128
##
## Number of Fisher Scoring iterations: 8
summary(Model4)
##
## Call:
## glm(formula = lc_change ~ wetlands + forest + farm + otherUndeveloped +
## lagDevelopment + pop_2010 + pop_2018, family = binomial(link = "logit"),
## data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2302138 0.4484361 -2.743 0.00608 **
## wetlands1 -1.0273539 0.3534724 -2.906 0.00366 **
## forest1 0.7263039 1.3334888 0.545 0.58598
## farm1 3.1520853 0.2346134 13.435 < 2e-16 ***
## otherUndeveloped1 4.1735983 0.2350309 17.758 < 2e-16 ***
## lagDevelopment 0.0003652 0.0006358 0.574 0.56563
## pop_2010 -0.0086905 0.0009444 -9.202 < 2e-16 ***
## pop_2018 0.0053349 0.0007003 7.618 2.58e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2774.27 on 5762 degrees of freedom
## Residual deviance: 966.43 on 5755 degrees of freedom
## AIC: 982.43
##
## Number of Fisher Scoring iterations: 8
testSetProbs <-
data.frame(class = datTest$lc_change,
probs = predict(Model4, datTest, type="response"))
ggplot(testSetProbs, aes(probs)) +
geom_density(aes(fill=class), alpha=0.5) +
scale_fill_manual(values = palette2,
labels=c("No Change","New Development")) +
labs(title = "Histogram of test set predicted probabilities - Model4",
x="Predicted Probabilities",y="Density") +
plotTheme
The testSetProbs dataframe: predClass_05 and predClass_17, are new predicted classes based on two different probability thresholds, 0.05 and 0.17, respectively. If the predicted probability (probs) is greater than or equal to the threshold, the predicted class is 1; otherwise, it’s 0.
Then, it calculates several evaluation metrics for each threshold: sensitivity, specificity, and accuracy. Sensitivity (also known as true positive rate) is the proportion of actual positive observations that are correctly classified as such. Specificity (true negative rate) is the proportion of actual negative observations that are correctly classified as such. Accuracy is the proportion of all observations that are correctly classified.
For a threshold of 0.05 (predClass_05), the model has a sensitivity of 0.40, specificity of 1, and accuracy of 0.96. For a threshold of 0.17 (predClass_17), the model has a sensitivity of 0.55, specificity of 1, and accuracy of 0.97.
The increase in sensitivity at the cost of lower specificity when the threshold is raised from 0.05 to 0.17 suggests that there is a trade-off between these two measures. Higher thresholds tend to classify more observations as positive, increasing sensitivity but potentially also increasing false positives, which would decrease specificity. In this case, specificity remains constant, indicating the model is very good at identifying true negatives.
options(yardstick.event_first = FALSE)
testSetProbs <-
testSetProbs %>%
mutate(predClass_05 = as.factor(ifelse(testSetProbs$probs >= 0.05 ,1,0)),
predClass_17 = as.factor(ifelse(testSetProbs$probs >= 0.17 ,1,0)))
testSetProbs %>%
dplyr::select(-probs) %>%
gather(Variable, Value, -class) %>%
group_by(Variable) %>%
summarize(Sensitivity = round(yardstick::sens_vec(class,factor(Value)),2),
Specificity = round(yardstick::spec_vec(class,factor(Value)),2),
Accuracy = round(yardstick::accuracy_vec(class,factor(Value)),2)) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)
| Variable | Sensitivity | Specificity | Accuracy |
|---|---|---|---|
| predClass_05 | 0.40 | 1 | 0.96 |
| predClass_17 | 0.55 | 1 | 0.97 |
predsForMap <-
dat %>%
mutate(probs = predict(Model4, dat, type="response") ,
Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
Threshold_17_Pct = as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
dplyr::select(lc_change,Threshold_5_Pct,Threshold_17_Pct) %>%
gather(Variable,Value, -geometry) %>%
st_cast("POLYGON")
ggplot() +
geom_point(data=predsForMap,
aes(x=xyC(predsForMap)[,1],
y=xyC(predsForMap)[,2],
colour=Value),
size=0.5) +
facet_wrap(~Variable) +
scale_colour_manual(values = palette2, labels=c("No Change","New Development"),
name="") +
labs(title="Development Predictions - Low Threshold") +
mapTheme
ConfusionMatrix.metrics <-
dat %>%
mutate(probs = predict(Model4, dat, type="response") ,
Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
Threshold_17_Pct = as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
mutate(TrueP_05 = ifelse(lc_change == 1 & Threshold_5_Pct == 1, 1,0),
TrueN_05 = ifelse(lc_change == 0 & Threshold_5_Pct == 0, 1,0),
TrueP_17 = ifelse(lc_change == 1 & Threshold_17_Pct == 1, 1,0),
TrueN_17 = ifelse(lc_change == 0 & Threshold_17_Pct == 0, 1,0)) %>%
dplyr::select(., starts_with("True")) %>%
gather(Variable, Value, -geometry) %>%
st_cast("POLYGON")
ggplot(data=ConfusionMatrix.metrics) +
geom_point(aes(x=xyC(ConfusionMatrix.metrics)[,1],
y=xyC(ConfusionMatrix.metrics)[,2],
colour = as.factor(Value)),
size=0.9) +
facet_wrap(~Variable) +
scale_colour_manual(values = palette2, labels=c("Correct","Incorrect"),
name="") +
labs(title="Development Predictions - Low Threshold") + mapTheme
The ROC curve is a graphical representation of the performance of a binary classification model. The curve is created by plotting the True Positive Rate (TPR) against the False Positive Rate (FPR) at various threshold settings.
The AUC represents the probability that a random positive (a location with land cover change) will be ranked higher by the model than a random negative (a location without land cover change). An AUC of 1 indicates perfect classification, while an AUC of 0.5 suggests that the model’s performance is no better than random guessing. In this case, the AUC for Model4 is approximately 0.916, which suggests a high level of model performance in distinguishing between areas with and without land cover change. This is a robust measure of the model’s overall performance, as it considers all possible classification thresholds.
library(plotROC)
library(glue)
testSetProbs <- predict(Model4, newdata = datTest, type = "response")
roc_obj <- roc(datTest$lc_change, testSetProbs)
# Convert ROC data to a data frame for use with ggplot
roc_df <- data.frame(
FPR = 1 - roc_obj$specificities,
TPR = roc_obj$sensitivities
)
# Create the ROC curve plot
ggplot(roc_df, aes(x = FPR, y = TPR)) +
geom_line(color = '#7a0177') +
labs(x = "False Positive Rate",
y = "True Positive Rate",
title = "ROC Curve for Model4",
subtitle = glue("Area Under Curve = {auc(roc_obj)}")) +
geom_abline(intercept = 0, slope = 1, linetype = "dotted") +
plotTheme +
theme(axis.text.x=element_text(size=8),
axis.text.y=element_text(size=8))
In this section, we uses population projections and the proportion of the county’s population in each area, to estimate population change in each area from 2018 to 2027.It first computes the “lagDevelopment” feature, which represents the distance to the nearest developed site in the previous year, for each site in the dataset.
dat <-
dat %>%
mutate(lagDevelopment = nn_function(xyC(.), xyC(filter(.,developed10 == 1)),2))
population_projection <- read.csv("/Users/hbliu/Desktop/2023Spring/CPLN675_Land_Use_Modeling/Finalproject/Population_Projections_in_Colorado.csv") %>%
filter(county == "Denver")
# Group by year and calculate total population
denver_pop <- population_projection %>%
group_by(year) %>%
summarise(totalPopulation = sum(totalPopulation))
# Create a line plot of total population by year with data points and labels
ggplot(denver_pop, aes(x = year, y = totalPopulation)) +
geom_line(color = '#7a0177', group = 1) +
geom_point(color = '#7a0177') +
geom_text(aes(label = totalPopulation), size=3, hjust=-.1) +
labs(x = "Year",
y = "Total Population",
title = "Population Change by Year for Denver",
subtitle = "Data Source: data.colorado") +
scale_x_continuous(breaks = seq(min(denver_pop$year), max(denver_pop$year), by = 1),
labels = as.character(seq(min(denver_pop$year), max(denver_pop$year), by = 1))) +
plotTheme
countyPopulation_2027 <- population_projection %>% filter(year == "2027")
countyPopulation_2018 <- population_projection %>% filter(year == "2018")
denver_2027 <- countyPopulation_2027 %>%
group_by(county) %>%
summarise(totalPopulation_2027 = sum(totalPopulation))
denver_2018 <- countyPopulation_2018 %>%
group_by(county) %>%
summarise(totalPopulation_2018 = sum(totalPopulation))
dat_infill <- dat %>%
left_join(denver_2027, by = c("COUNTY" = "county"))
dat_infill <- dat_infill %>%
left_join(denver_2018, by = c("COUNTY" = "county"))
Then we calculates the proportion of the county’s population in each area in 2018, and uses this to estimate the area’s population in 2027 (assuming the distribution of population within the county remains constant). It computes the population change from 2018 to 2027 for each area. Finally, it predicts the probability of development in each area in 2027 using Model4.
This map can be used to identify areas with the highest predicted demand for development in 2027, which locates in east northern area. But this is a simplified prediction and does not take into account factors like changes in land use policies, economic factors, and infrastructural changes that may affect land cover demand.
dat_infill <- dat_infill %>%
mutate(proportion_of_county_pop = pop_2018 / totalPopulation_2018,
pop_2027.infill = proportion_of_county_pop * totalPopulation_2027,
pop_Change = round(pop_2027.infill - pop_2018),2) %>%
dplyr::select(-proportion_of_county_pop) %>%
mutate(predict_2027.infill = predict(Model4,. , type="response"))
dat_infill %>%
ggplot() +
geom_point(aes(x=xyC(dat_infill)[,1], y=xyC(dat_infill)[,2], colour = factor(ntile(predict_2027.infill,5)))) +
scale_colour_manual(values = palette5,
labels=substr(quintileBreaks(dat_infill,"predict_2027.infill"),1,4),
name="Quintile\nBreaks") +
geom_sf(data=denver_boundary, fill=NA, colour="#7a0177", size=1.3) +
labs(title= "Development Demand in 2027: Predicted Probabilities") +
mapTheme
In this section, we identifies sensitive lands that were
lost between 2011 and 2019. Specifically, it creates a new binary
variable, sensitive_lost19, which is 1 if a polygon was forest or
wetland in 2011 but not in 2019, and 0 otherwise. In essence, this
variable indicates whether sensitive land cover (forest or wetland) was
replaced by something else (likely developed land) during that
period.
dat2 <-
aggregateRaster(theRasterList_19, dat) %>%
dplyr::select(developed_19,forest_19,farm_19,wetlands_19,otherUndeveloped_19,water_19) %>%
st_set_geometry(NULL) %>%
bind_cols(.,dat) %>%
st_sf() %>%
st_cast("POLYGON")
sensitive_land_lost gives an idea of how recent
developments have affected the natural environment, and only one area in
Denver has sensitive lost.
dat2 <-
dat2 %>%
mutate(sensitive_lost19 = ifelse(forest == 1 & forest_19 == 0 |
wetlands == 1 & wetlands_19 == 0,1,0))
ggplot() +
geom_point(data=dat2, size=0.6,
aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitive_lost19))) +
scale_colour_manual(values = palette2,
labels=c("No Change","Sensitive Lost"),
name = "") +
labs(title = "Sensitive lands lost: 2011 - 2019",
subtitle = "As fishnet centroids") +
mapTheme
selected <- dat_infill %>%
dplyr::select(totalPopulation_2027, totalPopulation_2018, pop_2027.infill, COUNTY)
selected <- as.data.frame(selected)
dat2 <- cbind(dat2, selected)
county_specific_metrics <-
dat2 %>%
#predict development demand from our model
mutate(Development_Demand = predict(Model4, dat2, type="response")) %>%
#get a count count of grid cells by county which we can use to calculate rates below
left_join(st_set_geometry(dat, NULL) %>% group_by(COUNTY) %>% summarize(count = n())) %>%
#calculate summary statistics by county
group_by(COUNTY) %>%
summarize(Total_Farmland = sum(farm_19) / max(count),
Total_Forest = sum(forest_19) / max(count),
Total_Wetlands = sum(wetlands_19) / max(count),
Total_Undeveloped = sum(otherUndeveloped_19) / max(count),
Sensitive_Land_Lost = sum(sensitive_lost19) / max(count),
Mean_Development_Demand = mean(Development_Demand),
Population_Change = totalPopulation_2027 - totalPopulation_2018,
Population_Change_Rate = Population_Change /totalPopulation_2027)
Allocation is the final stage of the urban growth modeling process. There are some obvious development opportunities in the northeast corner of Denver city, and there are certain development needs according to the model. For the estimated population data, the population change index of this area is not high, and the accessibility is not so high compared to the current highway network.
Denver <-
dat2 %>%
mutate(Development_Demand = predict(Model4, dat2, type="response"))
landUse <- rbind(
filter(Denver, forest_19 == 1 | wetlands_19 == 1 ) %>%
dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
filter(Denver, developed_19 == 1) %>%
dplyr::select() %>% mutate(Land_Use = "Developed"))
ggplot() +
geom_sf(data=Denver, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
geom_point(data=landUse, aes(x=xyC(landUse)[,1],
y=xyC(landUse)[,2], colour=Land_Use),
shape = 15, size = 0.6) +
geom_sf(data=highway, size=2) +
scale_fill_manual(values = palette5, name="Development\nDemand",
labels=substr(quintileBreaks(Denver,"Development_Demand"),1,5)) +
scale_colour_manual(values = c("black","yellow")) +
labs(title = "Development Potential, 2027: Denver") + mapTheme +
guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2))
ggplot() +
geom_sf(data=Denver, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
geom_point(data=landUse, aes(x=xyC(landUse)[,1],
y=xyC(landUse)[,2], colour=Land_Use),
shape = 15, size = 0.6) +
geom_sf(data=highway,size=2) +
scale_fill_manual(values = palette5, name="Population\nChange",
labels=substr(quintileBreaks(Denver,"pop_Change"),1,5)) +
scale_colour_manual(values = c("black","yellow")) +
labs(title = "Projected Population, 2027 :Denver") + mapTheme +
guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2))